# Data in
# Waste Data
data.waste = read_csv('../data/ww_data_all.csv')
# Case Data
data.cases = read_csv('../data/cases_site.csv')
# Site Data
# https://stackoverflow.com/questions/1253499/simple-calculations-for-working-with-lat-lon-and-km-distance#:~:text=The%20approximate%20conversions%20are%3A,111.320*cos(latitude)%20km
minutes_to_km = 110.574
consider_dist = 80
location_coords = tibble(
SampleLocation = c('Auckland', 'Wellington', 'Christchurch'),
Latitude = -c(36.8508, 41.2842, 43.5299),
Longitude = c(174.7645, 174.7775, 172.6333)
)
data.sites = read_csv('../data/sites.csv') %>%
mutate(dist_akld = sqrt((Latitude - location_coords[[1,2]])^2 + (Longitude - location_coords[[1,3]])^2) * minutes_to_km,
dist_wgtn = sqrt((Latitude - location_coords[[2,2]])^2 + (Longitude - location_coords[[2,3]])^2) * minutes_to_km,
dist_chch = sqrt((Latitude - location_coords[[3,2]])^2 + (Longitude - location_coords[[3,3]])^2) * minutes_to_km,
consider = dist_akld < consider_dist | dist_wgtn < consider_dist | dist_chch < consider_dist)
# Rain Data
data.weather = read_csv('../weather_data/weather_data.csv')
data.rain.sites = read_csv('../weather_data/rain_station_data_final.csv')
# Flow Data
data.flow = read_csv('../weather_data/flow_data.csv')
# Last 2 columns aren't interesting
data.flow = data.flow[,-c(4:5)]
data.flow
data.flow = read_csv('../weather_data/2 - DAILY FLOWS SEPT 2022.csv')
data.flow2 = data.flow %>%
mutate(DATECOLLECTED = dmy(DATECOLLECTED))
# Someone has put the bloody date in mdy format
data.flow[which(is.na(data.flow2$DATECOLLECTED)),]
data.flow2 = data.flow %>%
mutate(DATECOLLECTED1 = dmy(DATECOLLECTED),
DATECOLLECTED2 = mdy(DATECOLLECTED))
## Warning: 978 failed to parse.
data.flow3 = data.flow2 %>%
mutate(DATECOLLECTED = case_when(is.na(DATECOLLECTED1) ~ DATECOLLECTED2,
TRUE ~ DATECOLLECTED1))
data.flow3[which(is.na(data.flow2$DATECOLLECTED)),]
# Love to see it.
data.flow = data.flow3 %>% select(DATECOLLECTED, SAMPLELOCATION = SAMPLELOCATIONSMASTER, daily_flow_rate_m3)
# write_csv(data.flow, '../weather_data/flow_data_clean.csv')
Setup the data.
# Setup Rain data to explain Flow
data.weather.whole = data.weather %>%
left_join(data.rain.sites %>%
rename('Station' = Site), by = 'Station') %>%
pivot_longer(cols = 13:15, names_to = 'Del', values_to = 'catchment') %>%
select(-Del) %>%
filter(!is.na(catchment)) %>%
group_by(catchment) %>%
group_split()
(names(data.weather.whole) = data.weather.whole %>% map(~.$catchment %>% unique() %>% str_replace(' ', ''))) %>% unlist()
## [1] "AU_ArmyBay" "AU_Eastern" "AU_Rosedale" "AU_SouthWestern"
## [5] "AU_Warkworth" "AU_Western" "CA_Ashburton" "CA_Christchurch"
## [9] "CA_Rangiora" "MA_Blenheim" "MW_Levin" "WG_Karori"
## [13] "WG_Masterton" "WG_MoaPoint" "WG_Paraparaumu" "WG_Porirua"
## [17] "WG_Seaview" "WK_Hamilton" "WK_Thames"
weather_model_prep = function(data){
data %>% group_by(Date) %>%
summarise(avg_rain = mean(Amount.mm.)) %>%
mutate(avg_rain.lag1 = lag(avg_rain, 1),
avg_rain.lag2 = lag(avg_rain, 2),
avg_rain.lag3 = lag(avg_rain, 3),
avg_rain.lag4 = lag(avg_rain, 4),
avg_rain.lag5 = lag(avg_rain, 5),
avg_rain.lag6 = lag(avg_rain, 6),
avg_rain.lag7 = lag(avg_rain, 7))
}
# Locations mapped.
data.weather.whole.model = data.weather.whole %>% map(weather_model_prep)
data.waste.whole = data.waste %>%
filter(SampleLocation %in% names(data.weather.whole)) %>%
arrange(SampleLocation) %>%
rename('Date' = Collected) %>%
group_by(SampleLocation) %>%
group_split()
# Check names match
names(data.waste.whole) = data.waste.whole %>% map(~.$SampleLocation %>% unique()) %>% unlist()
# Get population estimates
# Old population estimates (ESR)
pop_names = map_chr(data.waste.whole, ~.$SampleLocation[1] %>% str_sub(4, -1))
data.population_old = data.sites %>% mutate(catchment = SampleLocation %>% str_sub(4,-1)) %>% filter(catchment %in% pop_names) %>%
select(catchment, Population) %>%
arrange(catchment)
# New Estimates (Mackay)
data.population = read_csv('../SitePopulationData.csv')
# These Sites have macron a's and o's that read_csv hasn't recognised. We'll replace these manually.
data.population[data.population$Site %>% str_detect('\\?'),]$Site = c('Opotiki', 'Taupo', 'Whakatane', 'Whangarei')
data.population = data.population %>%
rename('catchment' = Site, 'Population' = `Population Size`) %>%
mutate(catchment = catchment %>% str_replace_all(' ', '')) %>%
# The Eastern and South Western Interceptors are locations of interest but have 'Mangere' and 'Interceptor' attached. Remove this too
mutate(catchment = ifelse(catchment %>% str_detect('stern') & Region == 'Auckland', str_extract(catchment, '.*stern') %>% str_sub(8,-1), catchment))
data.population = data.population %>%
arrange(catchment)
# datapops joined because Mackay doesnt have all locations of interest. Prefer Mackay's, but will use ESR's if necessary
data.population_joined = data.population %>%
rbind(data.population_old %>%
cbind(data.frame(Region = rep(NA, nrow(data.population_old)), Island = rep(NA, nrow(data.population_old))))) %>%
select(-(2:3)) %>%
# ID's have different capitalization
mutate(catchment_lower = tolower(catchment)) %>%
distinct(catchment_lower, .keep_all = TRUE) %>%
arrange(catchment) %>%
filter(catchment_lower %in% tolower(pop_names)) %>%
select(-3)
# Southwestern is causing issues. Just fix the damn name
data.population_joined[data.population_joined$catchment == 'Southwestern',]$catchment = 'SouthWestern'
# Create the final modelling dataset
model.data = data.waste.whole %>%
map2(data.weather.whole.model, inner_join, by = c('Date')) %>%
map2(data.population_joined$Population, cbind) %>%
map(rename, 'population' = `.y[[i]]`) %>%
map(mutate, sars_per_person = sars_gcl / population)
names(model.data) = names(pop_names)
model.data$CA_Christchurch
Join with flow.
data.flow.split = data.flow %>%
rename('Date' = DATECOLLECTED) %>%
group_by(SAMPLELOCATION) %>%
group_split()
# Flow locations
model.data.flow = model.data[map_chr(data.flow.split, ~.$SAMPLELOCATION[[1]])]
# Join the flow estimates into the rain and waste data.
model.data.flow = map2(model.data.flow, data.flow.split, left_join, by = 'Date')
First I am interested in seeing how much rain explains the daily flow.
model.data.flow_joined = model.data.flow %>%
do.call(rbind, .) %>%
as_tibble()
# Flow over time
model.data.flow_joined %>%
ggplot(aes(x = Date, y = daily_flow_rate_m3)) +
geom_point() +
facet_wrap(~SampleLocation, scales = 'free_y')
## Warning: Removed 446 rows containing missing values (`geom_point()`).
Flow seems to vary over time for all locations. No data for AU_Western.
# Rain in line with Flow
normalise <- function(v){
(v - min(v, na.rm = T)) / (max(v, na.rm = T) - min(v, na.rm = T))
}
# Normalise the columns
model.data.flow_joined %>%
mutate(cumulative_rain_7days = avg_rain + avg_rain.lag1 + avg_rain.lag2 + avg_rain.lag3 + avg_rain.lag4 + avg_rain.lag5 + avg_rain.lag6,
cumulative_rain_7days.normal = normalise(cumulative_rain_7days),
daily_flow_rate_m3.normal = normalise(daily_flow_rate_m3)) %>%
ggplot(aes(x = Date, y = cumulative_rain_7days.normal)) +
geom_line(alpha = 0.3, col = 'blue') +
geom_point(aes(y = daily_flow_rate_m3.normal)) +
facet_wrap(~SampleLocation, scales = 'free_y')
## Warning: Removed 446 rows containing missing values (`geom_point()`).
Rain over the past week seems to be reasonably correlated with flow in those places which have varied flow as previously identified.
model.data.flow_joined %>%
mutate(cumulative_rain_7days = avg_rain + avg_rain.lag1 + avg_rain.lag2 + avg_rain.lag3 + avg_rain.lag4 + avg_rain.lag5 + avg_rain.lag6) %>%
ggplot(aes(x = cumulative_rain_7days, y = daily_flow_rate_m3)) +
geom_point(alpha = 0.3) +
facet_wrap(~SampleLocation, scales = 'free_y') +
labs(x = 'Total Rain over the past week (ml)')
## Warning: Removed 446 rows containing missing values (`geom_point()`).
In all locations, there seems to be a somewhat strong positive relationship between the amount of rain over the past 7 days and the amount of flow.
coef(rain_flow.fit2)[names(coef(rain_flow.fit2)) %>% str_detect('rain')] %>%
cbind(rain = names(.), effect_percent = (exp(.) - 1)*100) %>%
as_tibble() %>%
select(-1) %>%
mutate(effect_percent = as.numeric(effect_percent),
cumulative_percent = cumsum(effect_percent),
day = 0:7) %>%
ggplot(aes(x = day, y = effect_percent)) +
geom_point(size = 3) +
geom_smooth(se = F, aes(col = 'Individual Effect')) +
geom_smooth(aes(y = cumulative_percent, col = 'Cumulative Effect'), se = F) +
geom_point(aes(y = cumulative_percent), col = alpha('red', 0.7), pch = 5) +
theme_bw() +
labs(title = "Effect and Cumulative effect of a week's rain on Wastewater flow.",
x = "Day's prior", y = '% Increase in flow', colour = '') +
scale_x_continuous(breaks = 0:7) +
scale_y_continuous(breaks = seq(0, 5, 0.2)) +
theme(legend.position = 'top') +
scale_color_discrete()
source('../produce_modelling_summaries.R')
models = map(model.data.flow, produce_rainsummary.gam)
map(models, print_rainsummary.gam)
##
##
## Table: AU_Eastern
##
## | Concentration Loss %| 97.5%| 2.5%| P-Value|
## |--------------------:|-----:|------:|---------:|
## | 1.155| 6.155| -4.111| 0.6543066|
##
##
## Table: AU_Rosedale
##
## | Concentration Loss %| 97.5%| 2.5%| P-Value|
## |--------------------:|------:|-----:|---------:|
## | 6.691| 10.339| 2.895| 0.0005142|
##
##
## Table: AU_SouthWestern
##
## | Concentration Loss %| 97.5%| 2.5%| P-Value|
## |--------------------:|-----:|------:|---------:|
## | 0.811| 6.799| -5.561| 0.7935892|
##
##
## Table: AU_Western
##
## | Concentration Loss %| 97.5%| 2.5%| P-Value|
## |--------------------:|-----:|------:|---------:|
## | 2.997| 7.424| -1.641| 0.1925965|
##
##
## Table: CA_Christchurch
##
## | Concentration Loss %| 97.5%| 2.5%| P-Value|
## |--------------------:|------:|-----:|--------:|
## | 10.134| 15.778| 4.111| 0.000987|
##
##
## Table: WG_Karori
##
## | Concentration Loss %| 97.5%| 2.5%| P-Value|
## |--------------------:|------:|------:|---------:|
## | 5.273| 11.075| -0.906| 0.0864477|
##
##
## Table: WG_MoaPoint
##
## | Concentration Loss %| 97.5%| 2.5%| P-Value|
## |--------------------:|------:|-----:|---------:|
## | 8.433| 14.239| 2.234| 0.0071502|
##
##
## Table: WG_Porirua
##
## | Concentration Loss %| 97.5%| 2.5%| P-Value|
## |--------------------:|------:|-----:|---------:|
## | 7.283| 11.597| 2.758| 0.0015044|
##
##
## Table: WG_Seaview
##
## | Concentration Loss %| 97.5%| 2.5%| P-Value|
## |--------------------:|------:|-----:|-------:|
## | 8.944| 12.566| 5.172| 3.9e-06|
## $AU_Eastern
##
## $AU_Rosedale
##
## $AU_SouthWestern
##
## $AU_Western
##
## $CA_Christchurch
##
## $WG_Karori
##
## $WG_MoaPoint
##
## $WG_Porirua
##
## $WG_Seaview
In almost every location, rain proved to explain some of the variance
in the observed concentration of COVID-19 bodies.
In AU_Western, this effect is not totally significant and
in AU_Eastern rain appears to have also no effect let alone
MLE positive effect on the concentration. These are the locations that I
expect to see more variance explained when we factor in flow.
# Can't map some locations
map_dbl(model.data.flow, ~filter(., !is.na(daily_flow_rate_m3)) %>% nrow())
## AU_Eastern AU_Rosedale AU_SouthWestern AU_Western CA_Christchurch
## 39 71 38 0 58
## WG_Karori WG_MoaPoint WG_Porirua WG_Seaview
## 47 70 71 71
models = map(model.data.flow[-4], produce_flowrainsummary.gam)
map(models, print_flowrainsummary.gam)
##
##
## Table: AU_Eastern
##
## |Quantile | Flow Rate| % Concentration Loss Explained| 97.5%| 2.5%| P-Value|
## |:---------|---------:|------------------------------:|--------:|---------:|---------:|
## |Lower 25% | 1824.969| 10.48740| 75.97150| -233.4584| 0.8677053|
## |Median | 2220.906| 12.61333| 82.36515| -333.0307| 0.8677053|
## |Upper 75% | 3137.688| 17.34408| 91.38457| -692.9963| 0.8677053|
##
##
## Table: AU_Rosedale
##
## |Quantile | Flow Rate| % Concentration Loss Explained| 97.5%| 2.5%| P-Value|
## |:---------|---------:|------------------------------:|--------:|---------:|---------:|
## |Lower 25% | 49409.5| 47.75869| 76.17781| -14.56355| 0.1041809|
## |Median | 54654.0| 51.23783| 79.54252| -16.22883| 0.1041809|
## |Upper 75% | 67712.5| 58.92698| 85.99788| -20.48121| 0.1041809|
##
##
## Table: AU_SouthWestern
##
## |Quantile | Flow Rate| % Concentration Loss Explained| 97.5%| 2.5%| P-Value|
## |:---------|---------:|------------------------------:|--------:|---------:|--------:|
## |Lower 25% | 322.9357| -32.74890| 43.03762| -209.3668| 0.509977|
## |Median | 362.0000| -37.37683| 46.78641| -254.6536| 0.509977|
## |Upper 75% | 547.7163| -61.68400| 61.49968| -579.0000| 0.509977|
##
##
## Table: CA_Christchurch
##
## |Quantile | Flow Rate| % Concentration Loss Explained| 97.5%| 2.5%| P-Value|
## |:---------|---------:|------------------------------:|--------:|---------:|---------:|
## |Lower 25% | 138267.6| 73.84796| 94.29508| -19.88420| 0.0854828|
## |Median | 146598.6| 75.87827| 95.19925| -21.20140| 0.0854828|
## |Upper 75% | 163598.7| 79.54543| 96.62410| -23.93428| 0.0854828|
##
##
## Table: WG_Karori
##
## |Quantile | Flow Rate| % Concentration Loss Explained| 97.5%| 2.5%| P-Value|
## |:---------|---------:|------------------------------:|--------:|---------:|---------:|
## |Lower 25% | 3384.082| 30.38666| 56.99065| -12.67358| 0.1430274|
## |Median | 3781.303| 33.28434| 61.04613| -14.26282| 0.1430274|
## |Upper 75% | 4829.937| 40.36757| 70.00825| -18.56682| 0.1430274|
##
##
## Table: WG_MoaPoint
##
## |Quantile | Flow Rate| % Concentration Loss Explained| 97.5%| 2.5%| P-Value|
## |:---------|---------:|------------------------------:|---------:|---------:|---------:|
## |Lower 25% | 63208.56| -154.8860| -30.57718| -397.5363| 0.0072714|
## |Median | 67393.95| -171.1767| -32.90444| -453.3059| 0.0072714|
## |Upper 75% | 83168.15| -242.4990| -42.05453| -725.7784| 0.0072714|
##
##
## Table: WG_Porirua
##
## |Quantile | Flow Rate| % Concentration Loss Explained| 97.5%| 2.5%| P-Value|
## |:---------|---------:|------------------------------:|--------:|----------:|---------:|
## |Lower 25% | 21285.94| -18.59088| 13.55159| -62.68426| 0.2857408|
## |Median | 24401.17| -21.58746| 15.37448| -74.69330| 0.2857408|
## |Upper 75% | 35960.33| -33.38339| 21.80883| -127.53372| 0.2857408|
##
##
## Table: WG_Seaview
##
## |Quantile | Flow Rate| % Concentration Loss Explained| 97.5%| 2.5%| P-Value|
## |:---------|---------:|------------------------------:|--------:|----------:|---------:|
## |Lower 25% | 50866.01| -0.931197| 39.33606| -67.92687| 0.9710945|
## |Median | 57099.18| -1.045902| 42.94013| -78.93967| 0.9710945|
## |Upper 75% | 76807.93| -1.409446| 52.98633| -118.74222| 0.9710945|
## $AU_Eastern
##
## $AU_Rosedale
##
## $AU_SouthWestern
##
## $CA_Christchurch
##
## $WG_Karori
##
## $WG_MoaPoint
##
## $WG_Porirua
##
## $WG_Seaview
Unless my modelling techniques are incorrect, this is saying that in
almost every case it is ideal to have good flow estimates.
So, is flow useful in all locations?
map(models, print_flowsummary.gam)
##
##
## Table: AU_Eastern
##
## |Quantile | Flow Rate| % Concentration Loss Explained| 97.5%| 2.5%| P-Value|
## |:---------|---------:|------------------------------:|--------:|---------:|---------:|
## |Lower 25% | 1824.969| 10.48740| 75.97150| -233.4584| 0.8677053|
## |Median | 2220.906| 12.61333| 82.36515| -333.0307| 0.8677053|
## |Upper 75% | 3137.688| 17.34408| 91.38457| -692.9963| 0.8677053|
## NULL
## NULL
##
##
## Table: AU_Rosedale
##
## |Quantile | Flow Rate| % Concentration Loss Explained| 97.5%| 2.5%| P-Value|
## |:---------|---------:|------------------------------:|--------:|---------:|--------:|
## |Lower 25% | 49187.00| 46.85721| 73.49965| -6.570539| 0.074627|
## |Median | 54386.50| 50.29256| 76.97052| -7.289854| 0.074627|
## |Upper 75% | 66220.25| 57.30599| 83.26884| -8.945124| 0.074627|
## NULL
## NULL
##
##
## Table: AU_SouthWestern
##
## |Quantile | Flow Rate| % Concentration Loss Explained| 97.5%| 2.5%| P-Value|
## |:---------|---------:|------------------------------:|--------:|---------:|--------:|
## |Lower 25% | 322.9357| -32.74890| 43.03762| -209.3668| 0.509977|
## |Median | 362.0000| -37.37683| 46.78641| -254.6536| 0.509977|
## |Upper 75% | 547.7163| -61.68400| 61.49968| -579.0000| 0.509977|
## NULL
## NULL
##
##
## Table: CA_Christchurch
##
## |Quantile | Flow Rate| % Concentration Loss Explained| 97.5%| 2.5%| P-Value|
## |:---------|---------:|------------------------------:|--------:|---------:|---------:|
## |Lower 25% | 138367.9| 75.04558| 93.80432| -0.509136| 0.0518602|
## |Median | 145670.9| 76.80848| 94.65021| -0.536080| 0.0518602|
## |Upper 75% | 158901.4| 79.69114| 95.89949| -0.584911| 0.0518602|
## NULL
## NULL
##
##
## Table: WG_Karori
##
## |Quantile | Flow Rate| % Concentration Loss Explained| 97.5%| 2.5%| P-Value|
## |:---------|---------:|------------------------------:|--------:|---------:|---------:|
## |Lower 25% | 3381.513| 30.50597| 58.32700| -15.88846| 0.1651138|
## |Median | 3674.000| 32.65946| 61.36563| -17.37602| 0.1651138|
## |Upper 75% | 4651.875| 39.38633| 70.00544| -22.48945| 0.1651138|
## NULL
## NULL
##
##
## Table: WG_MoaPoint
##
## |Quantile | Flow Rate| % Concentration Loss Explained| 97.5%| 2.5%| P-Value|
## |:---------|---------:|------------------------------:|---------:|---------:|--------:|
## |Lower 25% | 62880.68| -151.2889| -24.87733| -405.6649| 0.010886|
## |Median | 66666.72| -165.6242| -26.55895| -457.4968| 0.010886|
## |Upper 75% | 82595.57| -235.4586| -33.88562| -740.5121| 0.010886|
## NULL
## NULL
##
##
## Table: WG_Porirua
##
## |Quantile | Flow Rate| % Concentration Loss Explained| 97.5%| 2.5%| P-Value|
## |:---------|---------:|------------------------------:|--------:|----------:|---------:|
## |Lower 25% | 21096.88| -18.70439| 11.78726| -59.73580| 0.2528757|
## |Median | 23933.78| -21.47316| 13.26250| -70.11938| 0.2528757|
## |Upper 75% | 35886.58| -33.86616| 19.21202| -121.81703| 0.2528757|
## NULL
## NULL
##
##
## Table: WG_Seaview
##
## |Quantile | Flow Rate| % Concentration Loss Explained| 97.5%| 2.5%| P-Value|
## |:---------|---------:|------------------------------:|--------:|----------:|---------:|
## |Lower 25% | 49854.94| -8.061532| 31.63517| -70.80850| 0.7361082|
## |Median | 55110.34| -8.948315| 34.32171| -80.72539| 0.7361082|
## |Upper 75% | 75664.01| -12.486939| 43.85276| -125.35945| 0.7361082|
## NULL
## NULL
## $AU_Eastern
## NULL
##
## $AU_Rosedale
## NULL
##
## $AU_SouthWestern
## NULL
##
## $CA_Christchurch
## NULL
##
## $WG_Karori
## NULL
##
## $WG_MoaPoint
## NULL
##
## $WG_Porirua
## NULL
##
## $WG_Seaview
## NULL